
* This do-file calculates several parameters used for the structural analysis 


global path "" // Set this to the path for the replication folder on your machine

clear
window manage forward results


************************************************************
* Obtain structural moments and their bootstrapped variances
************************************************************


 do "${path}/Do-files/auxiliary/bootstrap.do"



************************************************************
* Numerical simulation to calculate conditional expectations
************************************************************

clear
set seed 123456
scalar drop _all

global obsN = 100000

* The parameters in what follows are calculated in the spreadsheet "CostDistribution.xlsx"
* Parameters implied by 1/1
scalar a1 = -290
scalar b1 = 2254

* Parameters implied by 1.1/1.1
scalar a2 = -319
scalar b2 = 2480

* Parameters implied by 1.2/1.2
scalar a3 = -348
scalar b3 = 2705

* Parameters implied by 1/1.1
scalar a4 = -61
scalar b4 = 2229

* Parameters implied by 1.1/1.2
scalar a5 = -90
scalar b5 = 2454



* actual reporting rates
scalar r_worker = 0.900
scalar r_shared = 0.507
scalar r_supv =  0.409
scalar r_control = 0.234
 

* perceived reporting rates March 2024
scalar r_hat_worker = 0.88
scalar r_hat_shared = 0.77
scalar r_hat_supv = 0.59
scalar r_hat_control =  0.23

foreach v in worker shared supv control {
	
	local d_`v' : di %4.0f  r_`v'* $obsN
	local d_hat_`v' : di %4.0f  r_hat_`v'* $obsN

}

************
* Simulation
************

forvalues v =1(1)5 {
	
	preserve
	set obs $obsN
	ge u = runiform(a`v',b`v')
	sort u


	* calculate thresholds 
	qui: su u if _n == `d_worker'
	local t1 = `r(mean)'
	qui: su u if _n == `d_supv'
	local t2 = `r(mean)'
	qui: su u if _n == `d_shared'
	local t3 = `r(mean)'
	qui: su u if _n == `d_control'
	local t4 = `r(mean)'

	qui: su u if _n ==  `d_hat_worker'
	local that1 = `r(mean)'
	qui: su u if _n == `d_hat_supv'
	local that2 = `r(mean)'
	qui: su u if _n == `d_hat_shared'
	local that3 = `r(mean)'
	qui: su u if _n == `d_hat_control'
	local that4 = `r(mean)'


	* calculate costs for actual reporting rates
	qui: su u if u<`t1'
	local m1_`v' : di %4.3f `r(mean)'/100
	qui: su u if u<`t2'
	local m2_`v' : di %4.3f `r(mean)' /100
	qui: su u if u<`t3'
	local m3_`v' : di %4.3f `r(mean)' /100
	qui: su u if u<`t4'
	local m4_`v' : di %4.3f `r(mean)' /100

	* calculate costs for perceived reporting rates
	qui: su u if u<`that1'
	local m5_`v' : di %4.3f `r(mean)' /100
	qui: su u if u<`that2'
	local m6_`v' : di %4.3f `r(mean)' /100
	qui: su u if u<`that3'
	local m7_`v' : di %4.3f `r(mean)' /100
	qui: su u if u<`that4'
	local m8_`v' : di %4.3f `r(mean)'/100
	
	restore
}
clear

set obs 5

forvalues v =1(1)5 {
	
	ge sim`v' =.
	
	forvalues d =1(1)8 {
		qui: replace sim`v' = `m`d'_`v'' if _n == `d'
	}
}

 export excel using "${path}/Do-files/matlab/results/ReportingCosts1.xlsx" , replace firstrow(var)
 
 
 *********************************
 * 2. Simulation with halved costs
 *********************************
 
clear 
set seed 123456
scalar drop _all
global obsg = 100000

* HALVE COSTS: Parameters implied by assumption of no over-reporting in worker and 10pct over in shared incentives
* These parameters  are calculated in the spreadsheet "CostDistribution.xlsx"

scalar a = -61
scalar b = 2229

* actual reporting rates
scalar r_worker = 0.900
scalar r_shared = 0.507
scalar r_supv =  0.409
scalar r_control = 0.234
 

foreach v in worker shared supv control {
	local d_`v' : di %4.0f  r_`v'*100000
}


* Simulation


set obs $obsg
ge u = runiform(a,b)
sort u


* calculate thresholds for supv incentives and control groups
 su u if _n == `d_worker'
local t1 = `r(mean)'
 su u if _n == `d_shared'
local t2 = `r(mean)'
 su u if _n == `d_supv'
local t3 = `r(mean)'
 su u if _n == `d_control'
local t4 = `r(mean)'


* calculate costs for actual reporting rates
qui: su u if u<`t1'
local bm1 : di %4.3f `r(mean)'/100
qui: su u if u<`t2'
local bm2 : di %4.3f `r(mean)' /100
qui: su u if u<`t3'
local bm3 : di %4.3f `r(mean)' /100
qui: su u if u<`t4'
local bm4 : di %4.3f `r(mean)' /100


display "Costs implied by actual rates"
display "worker incentive `bm1'"
display "supv incentive `bm3'"
display "shared incentive `bm2'"
display "control `bm4'"


* new draw from new distribution
scalar c = -61
scalar d = 1145
ge u2 = runiform(c,d)
sort u2



* calculate reporting rate with old thresholds and new cost distribution
qui: count if u2<`t1'
local r1 : di %4.3f `r(N)'/$obsg
qui: count if u2<`t2'
local r2 : di %4.3f `r(N)'/$obsg
qui: count if u2<`t3'
local r3 : di %4.3f `r(N)'/$obsg
qui: count if u2<`t4'
local r4 : di %4.3f `r(N)'/$obsg

sort u2
* calculate costs with old thresholds and new cost distribution
qui: su u2 if u2<`t1'
local m1 : di %4.3f `r(mean)' /100
qui: su u2 if u2<`t2'
local m2 : di %4.3f `r(mean)' /100
qui: su u2 if u2<`t3'
local m3 : di %4.3f `r(mean)' /100
qui: su u2 if u2<`t4'
local m4 : di %4.3f `r(mean)' /100


display "Reporting rates when costs have been halved"
display "worker incentive `r1'"
display "supv incentive `r3'"
display "shared incentive `r2'"
display "control `r4'"


display "Costs when costs have been halved"
display "worker incentive `m1'"
display "supv incentive `m3'"
display "shared incentive `m2'"
display "control `m4'"


clear

set obs 4
ge rate =.
ge cost =. 
forvalues v =1(1)4 {
		qui: replace rate = `r`v'' if _n == `v'
		qui: replace cost = `m`v'' if _n == `v'
}

 export excel using "${path}/Do-files/matlab/results/ReportingCosts2.xlsx" , replace firstrow(var)

 
 *******************
 *** 3. Calculate Z
 *******************
clear
use 		"${path}/Data/data_all_final.dta", clear

* a) Define the transfer from supv to worker PER VISIT per month

	gen 	s = WK_transfer_fromSUPV_E / (HH_nb_visits_E / 6) 
			// HH_nb_visits_E is for a 6 months span => needs to be divided by 6
		
* b) Define p, share that goes to the worker
	
	gen 	p = 1 	if worker == 1
	replace p = 0.5 if shared == 1
	replace p = 0 	if supervisor == 1
	replace p = 0 	if incentives == 0 
	
* c) Define m

	qui: ge m = 2

* d) Define m*p; and let's adjust this number with the reporting rate. 

	gen 	mp_adj =  m * p  * .465 
	// for each visit provided, the worker receives on average 2k, but only if the visit is report => 2k * reporting rate [avg of .465]. This adjustment is not strictly necessary, but it is desirable: z and the reporting rate interact in the constant of this model, but to avoid using information on the absolute level of side payments (which is noisily measured), we do not use any information coming from the constant to pin down the level of z.

* e) Get the coefficient of s on pm_adj

	reg 	s mp_adj
	gen coef=_b[mp_adj]
	
* f) Get z

* s = a − [(z+1)/2z]*m*p. 
* coef of s on m*p = -[(z+1)/2z] 
* [(z+1)/2z] = -coef
* (z+1) = -coef*2z
* 1 = [(-coef*2) - 1]z
* z= 1/[(-coef*2) - 1]

 qui: ge z = 1 /((-1*coef*2)-1)
 qui: su z
 global z : di %5.1f `r(mean)'
 display "z is $z"
* z is   3.6

	
 